rm(list=ls())
require(quanteda)
require(LSX)
require(quanteda.textmodels)
quanteda_options(threads = 60)
source("functions.R")

dat_dict <- readRDS('data/data_dictionary.RDS')
toks <- readRDS("data/data_tokens.RDS") %>% 
      tokens_subset(docid %in% dat_dict$docid) %>% 
      tokens_remove('\\d', valuetype = 'regex', min_nchar = 2)

threat <- char_context(toks, dict["target"], window = 10)
seed <- as.seedwords(dict$security)

dfmt <- dfm(toks, remove = "") %>% 
    dfm_trim(min_termfreq = 10)

lss <- textmodel_lss(dfmt, seed, threat, cache = TRUE, k = 300, slice = 150)

cohesion(lss)
head(coef(lss), 100)
tail(coef(lss), 100)

textplot_terms(lss)

dfmt_group <- dfm_group(dfmt)
#pred$date <- docvars(dfmt_group, "created_at")
dat <- docvars(dfmt_group)

pred <- predict(lss, newdat = dfmt_group, density = TRUE, min_n = 5)
#pred <- as.data.frame(predict(lss_security, se.fit = TRUE, newdat = dfmt_group, density = TRUE))

dat$lss <- pred$fit
dat$density <- pred$density

saveRDS(lss, "data/lss.RDS")
saveRDS(dat, "data/data_lss.RDS")
